This patch add CPU hot-add in system.
a) It mark all CPU as possible when booting, if CONFIG_HOTPLUG_CPU is
set. BTW, this will increase per_cpu area.
b) When a CPU is added through hypercall, the CPU will be marked as
present and offline, and the numa information is setup if numa is
supported. The CPU will be brought to online by dom0 online explicitly.
Signed-off-by: Jiang, Yunhong <yunhong.jiang@intel.com>
#warning ACPI uses CMPXCHG, i486 and later hardware
#endif
-#define MAX_MADT_ENTRIES 256
u8 x86_acpiid_to_apicid[MAX_MADT_ENTRIES] =
{[0 ... MAX_MADT_ENTRIES - 1] = 0xff };
EXPORT_SYMBOL(x86_acpiid_to_apicid);
/* Have we found an MP table */
int smp_found_config;
-unsigned int __initdata maxcpus = NR_CPUS;
+unsigned int __devinitdata maxcpus = NR_CPUS;
/*
* Various Linux-internal data structures created from the
return MP_processor_info(&processor);
}
+void mp_unregister_lapic(uint32_t apic_id, uint32_t cpu)
+{
+ if (!cpu || (apic_id == boot_cpu_physical_apicid))
+ return;
+
+ if (x86_cpu_to_apicid[cpu] != apic_id)
+ return;
+
+ physid_clear(apic_id, phys_cpu_present_map);
+
+ x86_cpu_to_apicid[cpu] = BAD_APICID;
+ cpu_clear(cpu, cpu_present_map);
+ }
+
#ifdef CONFIG_X86_IO_APIC
#define MP_ISA_BUS 0
nodemask_t __read_mostly node_online_map = { { [0] = 1UL } };
/* Default NUMA to off for now. acpi=on required to enable it. */
-int numa_off __initdata = 1;
+int numa_off __devinitdata = 1;
-int acpi_numa __initdata;
+int acpi_numa __devinitdata;
/*
* Given a shift value, try to populate memnodemap[]
* 0 if memnodmap[] too small (of shift too small)
* -1 if node overlap or lost ram (shift too big)
*/
-static int __init
+static int __devinit
populate_memnodemap(const struct node *nodes, int numnodes, int shift)
{
int i;
* prior to this call, and this initialization is good enough
* for the fake NUMA cases.
*/
-void __init init_cpu_to_node(void)
+void __devinit init_cpu_to_node(void)
{
int i;
for (i = 0; i < NR_CPUS; i++) {
}
break;
+ case XENPF_cpu_hotadd:
+ ret = cpu_add(op->u.cpu_add.apic_id,
+ op->u.cpu_add.acpi_id,
+ op->u.cpu_add.pxm);
+ break;
+
default:
ret = -ENOSYS;
break;
setup_idle_pagetable();
}
-static void __init srat_detect_node(int cpu)
+void __devinit srat_detect_node(int cpu)
{
unsigned node;
u32 apicid = x86_cpu_to_apicid[cpu];
smp_prepare_boot_cpu();
+#ifdef CONFIG_HOTPLUG_CPU
+ prefill_possible_map();
+#endif
+
/* We initialise the serial devices very early so we can get debugging. */
ns16550.io_base = 0x3f8;
ns16550.irq = 4;
*/
smpboot_restore_warm_reset_vector();
}
+
+int prefill_possible_map(void)
+{
+ int i;
+
+ for (i = 0; i < NR_CPUS; i++)
+ cpu_set(i, cpu_possible_map);
+ return 0;
+}
+
+int cpu_add(uint32_t apic_id, uint32_t acpi_id, uint32_t pxm)
+{
+ int cpu = -1;
+
+#ifndef CONFIG_ACPI
+ return -ENOSYS;
+#endif
+
+ dprintk(XENLOG_DEBUG, "cpu_add apic_id %x acpi_id %x pxm %x\n",
+ apic_id, acpi_id, pxm);
+
+ if ( acpi_id > MAX_MADT_ENTRIES || apic_id > MAX_APICS || pxm > 256 )
+ return -EINVAL;
+
+ /* Detect if the cpu has been added before */
+ if ( x86_acpiid_to_apicid[acpi_id] != 0xff)
+ {
+ if (x86_acpiid_to_apicid[acpi_id] != apic_id)
+ return -EINVAL;
+ else
+ return -EEXIST;
+ }
+
+ if ( physid_isset(apic_id, phys_cpu_present_map) )
+ return -EEXIST;
+
+ spin_lock(&cpu_add_remove_lock);
+
+ cpu = mp_register_lapic(apic_id, 1);
+
+ if (cpu < 0)
+ {
+ spin_unlock(&cpu_add_remove_lock);
+ return cpu;
+ }
+
+ x86_acpiid_to_apicid[acpi_id] = apic_id;
+
+ if ( !srat_disabled() )
+ {
+ int node;
+
+ node = setup_node(pxm);
+ if (node < 0)
+ {
+ dprintk(XENLOG_WARNING, "Setup node failed for pxm %x\n", pxm);
+ x86_acpiid_to_apicid[acpi_id] = 0xff;
+ mp_unregister_lapic(apic_id, cpu);
+ spin_unlock(&cpu_add_remove_lock);
+ return node;
+ }
+ apicid_to_node[apic_id] = node;
+ }
+
+ srat_detect_node(cpu);
+ numa_add_cpu(cpu);
+ spin_unlock(&cpu_add_remove_lock);
+ dprintk(XENLOG_INFO, "Add CPU %x with index %x\n", apic_id, cpu);
+ return cpu;
+}
+
+
#else /* ... !CONFIG_HOTPLUG_CPU */
int __cpu_disable(void)
{
/* We said "no" in __cpu_disable */
BUG();
}
+
+int cpu_add(uint32_t apic_id, uint32_t acpi_id, uint32_t pxm)
+{
+ return -ENOSYS;
+}
#endif /* CONFIG_HOTPLUG_CPU */
int __devinit __cpu_up(unsigned int cpu)
return (signed char)pxm2node[pxm];
}
-static __init int setup_node(int pxm)
+__devinit int setup_node(int pxm)
{
unsigned node = pxm2node[pxm];
if (node == 0xff) {
apicid_to_node[i] = NUMA_NO_NODE;
}
-static __init inline int srat_disabled(void)
-{
- return numa_off || acpi_numa < 0;
-}
-
/*
* A lot of BIOS fill in 10 (= no distance) everywhere. This messes
* up the NUMA heuristics which wants the local node to have a smaller
#endif /* CONFIG_ACPI_SLEEP */
+#define MAX_MADT_ENTRIES 256
extern u8 x86_acpiid_to_apicid[];
#define MAX_LOCAL_APIC 256
extern void numa_init_array(void);
extern int numa_off;
+static __devinit inline int srat_disabled(void)
+{
+ return numa_off || acpi_numa < 0;
+}
extern void numa_set_node(int cpu, int node);
+extern int setup_node(int pxm);
+extern void srat_detect_node(int cpu);
extern void setup_node_bootmem(int nodeid, u64 start, u64 end);
extern unsigned char apicid_to_node[256];
extern void cpu_uninit(void);
extern void disable_nonboot_cpus(void);
extern void enable_nonboot_cpus(void);
+int prefill_possible_map(void);
+int cpu_add(uint32_t apic_id, uint32_t acpi_id, uint32_t pxm);
#else
static inline int cpu_is_offline(int cpu) {return 0;}
static inline void disable_nonboot_cpus(void) {}
typedef struct xenpf_cpu_ol xenpf_cpu_ol_t;
DEFINE_XEN_GUEST_HANDLE(xenpf_cpu_ol_t);
+#define XENPF_cpu_hotadd 58
+struct xenpf_cpu_hotadd
+{
+ uint32_t apic_id;
+ uint32_t acpi_id;
+ uint32_t pxm;
+};
+
struct xen_platform_op {
uint32_t cmd;
uint32_t interface_version; /* XENPF_INTERFACE_VERSION */
struct xenpf_set_processor_pminfo set_pminfo;
struct xenpf_pcpuinfo pcpu_info;
struct xenpf_cpu_ol cpu_ol;
+ struct xenpf_cpu_hotadd cpu_add;
uint8_t pad[128];
} u;
};